!pip install geopandas
!pip install contextily
!pip install seaborn as sns
!pip install folium
!pip install sodapy
!pip install ipywidgets
!pip install dabl
import zipfile
import geopandas as gpd
import contextily as ctx
from shapely.geometry import Point
import pandas as pd
import folium
import numpy as np
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px
import pandas as pd
from sodapy import Socrata
import string
import ipywidgets as widgets
from dabl.plot import mosaic_plot
Requirement already satisfied: geopandas in /opt/conda/lib/python3.8/site-packages (0.12.1) Requirement already satisfied: fiona>=1.8 in /opt/conda/lib/python3.8/site-packages (from geopandas) (1.8.18) Requirement already satisfied: packaging in /opt/conda/lib/python3.8/site-packages (from geopandas) (22.0) Requirement already satisfied: pyproj>=2.6.1.post1 in /opt/conda/lib/python3.8/site-packages (from geopandas) (2.6.1.post1) Requirement already satisfied: pandas>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from geopandas) (1.1.2) Requirement already satisfied: shapely>=1.7 in /opt/conda/lib/python3.8/site-packages (from geopandas) (1.7.1) Requirement already satisfied: attrs>=17 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (20.2.0) Requirement already satisfied: certifi in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (2022.9.24) Requirement already satisfied: click<8,>=4.0 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (7.1.2) Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (0.7.2) Requirement already satisfied: click-plugins>=1.0 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (1.1.1) Requirement already satisfied: six>=1.7 in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (1.15.0) Requirement already satisfied: munch in /opt/conda/lib/python3.8/site-packages (from fiona>=1.8->geopandas) (2.5.0) Requirement already satisfied: numpy>=1.15.4 in /opt/conda/lib/python3.8/site-packages (from pandas>=1.0.0->geopandas) (1.19.1) Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas>=1.0.0->geopandas) (2020.1) Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=1.0.0->geopandas) (2.8.1) Requirement already satisfied: contextily in /opt/conda/lib/python3.8/site-packages (1.2.0) Requirement already satisfied: pillow in /opt/conda/lib/python3.8/site-packages (from contextily) (7.2.0) Requirement already satisfied: mercantile in /opt/conda/lib/python3.8/site-packages (from contextily) (1.2.1) Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from contextily) (0.17.0) Requirement already satisfied: xyzservices in /opt/conda/lib/python3.8/site-packages (from contextily) (2022.9.0) Requirement already satisfied: geopy in /opt/conda/lib/python3.8/site-packages (from contextily) (2.3.0) Requirement already satisfied: matplotlib in /opt/conda/lib/python3.8/site-packages (from contextily) (3.3.2) Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from contextily) (2.28.1) Requirement already satisfied: rasterio in /opt/conda/lib/python3.8/site-packages (from contextily) (1.2.1) Requirement already satisfied: click>=3.0 in /opt/conda/lib/python3.8/site-packages (from mercantile->contextily) (7.1.2) Requirement already satisfied: geographiclib<3,>=1.52 in /opt/conda/lib/python3.8/site-packages (from geopy->contextily) (1.52) Requirement already satisfied: numpy>=1.15 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (1.19.1) Requirement already satisfied: certifi>=2020.06.20 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (2022.9.24) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (2.4.7) Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (0.10.0) Requirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (2.8.1) Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->contextily) (1.2.0) Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.8/site-packages (from requests->contextily) (2.1.1) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->contextily) (2.10) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->contextily) (1.25.10) Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (0.7.2) Requirement already satisfied: affine in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (2.3.1) Requirement already satisfied: click-plugins in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (1.1.1) Requirement already satisfied: attrs in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (20.2.0) Requirement already satisfied: snuggs>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from rasterio->contextily) (1.4.7) Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from cycler>=0.10->matplotlib->contextily) (1.15.0) Requirement already satisfied: seaborn in /opt/conda/lib/python3.8/site-packages (0.11.0) ERROR: Could not find a version that satisfies the requirement as (from versions: none) ERROR: No matching distribution found for as Requirement already satisfied: folium in /opt/conda/lib/python3.8/site-packages (0.13.0) Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from folium) (2.28.1) Requirement already satisfied: branca>=0.3.0 in /opt/conda/lib/python3.8/site-packages (from folium) (0.6.0) Requirement already satisfied: jinja2>=2.9 in /opt/conda/lib/python3.8/site-packages (from folium) (2.11.2) Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from folium) (1.19.1) Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2.1.1) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2.10) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (1.25.10) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2022.9.24) Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.8/site-packages (from jinja2>=2.9->folium) (1.1.1) Requirement already satisfied: sodapy in /opt/conda/lib/python3.8/site-packages (2.2.0) Requirement already satisfied: requests>=2.28.1 in /opt/conda/lib/python3.8/site-packages (from sodapy) (2.28.1) Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (2022.9.24) Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (2.10) Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (2.1.1) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.28.1->sodapy) (1.25.10) Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.8/site-packages (7.5.1) Requirement already satisfied: ipython>=4.0.0; python_version >= "3.3" in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (7.18.1) Requirement already satisfied: nbformat>=4.2.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (5.0.7) Requirement already satisfied: widgetsnbextension~=3.5.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (3.5.1) Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (5.3.4) Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (5.0.4) Requirement already satisfied: pexpect>4.3; sys_platform != "win32" in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (4.8.0) Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (3.0.7) Requirement already satisfied: jedi>=0.10 in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.17.2) Requirement already satisfied: pickleshare in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.7.5) Requirement already satisfied: decorator in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (4.4.2) Requirement already satisfied: pygments in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (2.7.1) Requirement already satisfied: setuptools>=18.5 in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (49.6.0.post20200917) Requirement already satisfied: backcall in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.2.0) Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (3.2.0) Requirement already satisfied: jupyter-core in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (4.6.3) Requirement already satisfied: ipython-genutils in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (0.2.0) Requirement already satisfied: notebook>=4.4.1 in /opt/conda/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets) (6.1.4) Requirement already satisfied: tornado>=4.2 in /opt/conda/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.0.4) Requirement already satisfied: jupyter-client in /opt/conda/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1.7) Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.8/site-packages (from pexpect>4.3; sys_platform != "win32"->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.6.0) Requirement already satisfied: wcwidth in /opt/conda/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.2.5) Requirement already satisfied: parso<0.8.0,>=0.7.0 in /opt/conda/lib/python3.8/site-packages (from jedi>=0.10->ipython>=4.0.0; python_version >= "3.3"->ipywidgets) (0.7.1) Requirement already satisfied: six>=1.11.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (1.15.0) Requirement already satisfied: attrs>=17.4.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (20.2.0) Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (0.17.3) Requirement already satisfied: terminado>=0.8.3 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.9.1) Requirement already satisfied: argon2-cffi in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (20.1.0) Requirement already satisfied: Send2Trash in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.0) Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.0) Requirement already satisfied: jinja2 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.11.2) Requirement already satisfied: nbconvert in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (6.0.7) Requirement already satisfied: pyzmq>=17 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (19.0.2) Requirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.8/site-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.1) Requirement already satisfied: cffi>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.14.3) Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.8/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.1.1) Requirement already satisfied: defusedxml in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.6.0) Requirement already satisfied: bleach in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (3.2.1) Requirement already satisfied: entrypoints>=0.2.2 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.3) Requirement already satisfied: testpath in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.4.4) Requirement already satisfied: mistune<2,>=0.8.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.4) Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.0) Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.4.2) Requirement already satisfied: jupyterlab-pygments in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.1.2) Requirement already satisfied: pycparser in /opt/conda/lib/python3.8/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.20) Requirement already satisfied: webencodings in /opt/conda/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.1) Requirement already satisfied: packaging in /opt/conda/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (22.0) Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.4.1) Requirement already satisfied: async-generator in /opt/conda/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.10) Collecting dabl Downloading dabl-0.2.4-py3-none-any.whl (563 kB) |████████████████████████████████| 563 kB 7.4 MB/s eta 0:00:01 Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (from dabl) (1.1.2) Collecting matplotlib>=3.4 Downloading matplotlib-3.6.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (9.4 MB) |████████████████████████████████| 9.4 MB 29.2 MB/s eta 0:00:01 Requirement already satisfied: scipy in /opt/conda/lib/python3.8/site-packages (from dabl) (1.5.2) Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from dabl) (1.19.1) Collecting scikit-learn>=1.0 Downloading scikit_learn-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.7 MB) |████████████████████████████████| 9.7 MB 42.9 MB/s eta 0:00:01 Requirement already satisfied: seaborn in /opt/conda/lib/python3.8/site-packages (from dabl) (0.11.0) Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas->dabl) (2.8.1) Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas->dabl) (2020.1) Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib>=3.4->dabl) (7.2.0) Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib>=3.4->dabl) (22.0) Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib>=3.4->dabl) (1.2.0) Collecting fonttools>=4.22.0 Downloading fonttools-4.38.0-py3-none-any.whl (965 kB) |████████████████████████████████| 965 kB 21.1 MB/s eta 0:00:01 Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib>=3.4->dabl) (0.10.0) Collecting contourpy>=1.0.1 Downloading contourpy-1.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (295 kB) |████████████████████████████████| 295 kB 46.7 MB/s eta 0:00:01 Requirement already satisfied: pyparsing>=2.2.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib>=3.4->dabl) (2.4.7) Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from scikit-learn>=1.0->dabl) (2.1.0) Collecting joblib>=1.1.1 Downloading joblib-1.2.0-py3-none-any.whl (297 kB) |████████████████████████████████| 297 kB 1.6 MB/s eta 0:00:01 Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas->dabl) (1.15.0) Installing collected packages: fonttools, contourpy, matplotlib, joblib, scikit-learn, dabl Attempting uninstall: matplotlib Found existing installation: matplotlib 3.3.2 Uninstalling matplotlib-3.3.2: Successfully uninstalled matplotlib-3.3.2 Attempting uninstall: joblib Found existing installation: joblib 0.17.0 Uninstalling joblib-0.17.0: Successfully uninstalled joblib-0.17.0 Attempting uninstall: scikit-learn Found existing installation: scikit-learn 0.23.2 Uninstalling scikit-learn-0.23.2: Successfully uninstalled scikit-learn-0.23.2 Successfully installed contourpy-1.0.6 dabl-0.2.4 fonttools-4.38.0 joblib-1.2.0 matplotlib-3.6.2 scikit-learn-1.2.0
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.sfgov.org", None)
results = client.get("wg3w-h783", limit=1000000)
# Convert to pandas DataFrame
df1 = pd.DataFrame.from_records(results)
df = df1
WARNING:root:Requests made without an app_token will be subject to strict throttling limits.
df = df1
df.columns
Index(['incident_datetime', 'incident_date', 'incident_time', 'incident_year',
'incident_day_of_week', 'report_datetime', 'row_id', 'incident_id',
'incident_number', 'report_type_code', 'report_type_description',
'filed_online', 'incident_code', 'incident_category',
'incident_subcategory', 'incident_description', 'resolution',
'police_district', 'cad_number', 'intersection', 'cnn',
'analysis_neighborhood', 'supervisor_district', 'latitude', 'longitude',
'point', ':@computed_region_jwn9_ihcz', ':@computed_region_26cr_cadq',
':@computed_region_qgnn_b9vv', ':@computed_region_nqbw_i6c3',
':@computed_region_h4ep_8xdi', ':@computed_region_n4xg_c4py',
':@computed_region_jg9y_a9du'],
dtype='object')
df.drop(columns=[':@computed_region_jwn9_ihcz', ':@computed_region_26cr_cadq', ':@computed_region_qgnn_b9vv',
':@computed_region_nqbw_i6c3', ':@computed_region_h4ep_8xdi',':@computed_region_n4xg_c4py',
':@computed_region_jg9y_a9du', 'point'], inplace = True)
df['incident_datetime']=pd.to_datetime(df['incident_datetime'], errors = 'coerce')
df['incident_year']=pd.to_datetime(df['incident_year'], errors = 'coerce')
df['incident_date']=pd.to_datetime(df['incident_date'], errors = 'coerce')
df['report_datetime']=pd.to_datetime(df['report_datetime'], errors = 'coerce')
df.dtypes
incident_datetime datetime64[ns] incident_date datetime64[ns] incident_time object incident_year datetime64[ns] incident_day_of_week object report_datetime datetime64[ns] row_id object incident_id object incident_number object report_type_code object report_type_description object filed_online object incident_code object incident_category object incident_subcategory object incident_description object resolution object police_district object cad_number object intersection object cnn object analysis_neighborhood object supervisor_district object latitude object longitude object dtype: object
def o_str(value):
return str(value)
def o_date(value):
return str(value)
def o_numeric(value):
return float(value)
df['incident_id']=df['incident_id'].apply(o_numeric)
df['row_id']=df['row_id'].apply(o_numeric)
df['incident_code']=df['incident_code'].apply(o_numeric)
df['incident_number']=df['incident_number'].apply(o_numeric)
df['cad_number']=df['cad_number'].apply(o_numeric)
df['cnn']=df['cnn'].apply(o_numeric)
df['report_type_description']=df['report_type_description'].apply(o_str)
df['incident_category']=df['incident_category'].apply(o_str)
df['incident_subcategory']=df['incident_subcategory'].apply(o_str)
df['resolution']=df['resolution'].apply(o_str)
df['police_district']=df['police_district'].apply(o_str)
df['analysis_neighborhood']=df['analysis_neighborhood'].apply(o_str)
df['latitude']=df['latitude'].apply(o_numeric)
df['longitude']=df['longitude'].apply(o_numeric)
df.resolution.unique()
array(['Open or Active', 'Cite or Arrest Adult', 'Exceptional Adult',
'Unfounded'], dtype=object)
df.police_district.unique()
array(['Southern', 'Out of SF', 'Central', 'Mission', 'Richmond',
'Ingleside', 'Park', 'Northern', 'Bayview', 'Tenderloin',
'Taraval'], dtype=object)
df.report_type_description.unique()
array(['Coplogic Initial', 'Vehicle Supplement', 'Initial Supplement',
'Initial', 'Vehicle Initial', 'Coplogic Supplement'], dtype=object)
df.analysis_neighborhood.unique()
array(['nan', 'Excelsior', 'Russian Hill', 'Lone Mountain/USF',
'Pacific Heights', 'Bayview Hunters Point', 'Glen Park', 'Mission',
'Tenderloin', 'Hayes Valley', 'Western Addition', 'Outer Mission',
'Marina', 'South of Market', 'Japantown', 'McLaren Park',
'Presidio Heights', 'Mission Bay', 'Nob Hill', 'North Beach',
'Financial District/South Beach', 'West of Twin Peaks',
'Chinatown', 'Haight Ashbury', 'Castro/Upper Market',
'Sunset/Parkside', 'Potrero Hill', 'Lakeshore', 'Outer Richmond',
'Inner Sunset', 'Twin Peaks', 'Oceanview/Merced/Ingleside',
'Portola', 'Presidio', 'Bernal Heights', 'Golden Gate Park',
'Noe Valley', 'Visitacion Valley', 'Inner Richmond',
'Treasure Island', 'Lincoln Park', 'Seacliff', 'null'],
dtype=object)
# Filling in the NA values with In Person, as specified on the website,
# any row which does not have Filled Online it is assumed that the complaint was filed in person
df['filed_online'].fillna('In Person',inplace = True)
df.dtypes
incident_datetime datetime64[ns] incident_date datetime64[ns] incident_time object incident_year datetime64[ns] incident_day_of_week object report_datetime datetime64[ns] row_id float64 incident_id float64 incident_number float64 report_type_code object report_type_description object filed_online object incident_code float64 incident_category object incident_subcategory object incident_description object resolution object police_district object cad_number float64 intersection object cnn float64 analysis_neighborhood object supervisor_district object latitude float64 longitude float64 dtype: object
#Checking null values for each column
df.isnull().sum()
incident_datetime 0 incident_date 0 incident_time 0 incident_year 0 incident_day_of_week 0 report_datetime 0 row_id 0 incident_id 0 incident_number 0 report_type_code 0 report_type_description 0 filed_online 0 incident_code 0 incident_category 0 incident_subcategory 0 incident_description 0 resolution 0 police_district 0 cad_number 150519 intersection 35674 cnn 35674 analysis_neighborhood 0 supervisor_district 35674 latitude 35674 longitude 35674 dtype: int64
#Since over 70% of the data is null drop this columns
# df.drop(columns = ['incident_day_of_week'], inplace=True)
df.cad_number.fillna('0', inplace = True)
df.intersection.fillna('0', inplace = True)
df.cnn.fillna('0', inplace = True)
df.supervisor_district.fillna('0', inplace = True)
df['latitude'].fillna(float(df['latitude'].mean()), inplace=True)
df['longitude'].fillna(float(df['longitude'].mean()), inplace=True)
df.analysis_neighborhood.fillna('0', inplace = True)
#Reindexing and finding shape of dataframe
df.iloc[:].reindex()
df.shape
(673198, 25)
df.rename(columns={'incident_category': 'Incident Category'}, inplace = True)
# Barplot showing major crimes in San Francisco
plt.rcParams['figure.figsize'] = (20, 9)
plt.style.use('fast')
sns.countplot(df['Incident Category'], palette = 'hot')
plt.title('Major Crimes in San Francisco', fontweight = 20, fontsize = 20)
plt.xticks(rotation = 90)
plt.show()
/opt/conda/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
# Distribution of crimes in San Francisco as per days of week
df['incident_day_of_week'].value_counts().plot.pie(figsize = (10, 20), explode = (0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), autopct='%1.1f%%')
plt.axis('off')
plt.title('Crime count on each day',fontsize = 20)
centre_circle = plt.Circle((0,0),0.6,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.xticks(rotation = 90)
plt.show()
df['incident_year'] = pd.to_datetime(df['incident_year'])
df['year'] = df['incident_year'].dt.strftime('%Y-%m-%d').astype(str)
df.head()
fig = px.histogram(data_frame = df, color = 'year', x='Incident Category', animation_frame='police_district',
barmode='overlay', marginal='box', height = 900, width=900)
fig.show('notebook')